# Copyright (C) 2017-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information import random from hypothesis import given from django.utils.html import escape from swh.model.identifiers import ObjectType from swh.web.browse.snapshot_context import process_snapshot_branches from swh.web.browse.utils import ( _re_encode_content, get_mimetype_and_encoding_for_content, prepare_content_for_display, ) from swh.web.common.exc import NotFoundExc from swh.web.common.identifiers import gen_swhid from swh.web.common.utils import gen_path_info, reverse from swh.web.tests.django_asserts import assert_contains, assert_not_contains from swh.web.tests.strategies import ( content, content_application_no_highlight, content_image_type, content_text, content_text_no_highlight, content_text_non_utf8, content_unsupported_image_type_rendering, content_utf8_detected_as_binary, invalid_sha1, origin_with_multiple_visits, unknown_content, ) from swh.web.tests.utils import check_html_get_response, check_http_get_response @given(content_text()) def test_content_view_text(client, archive_data, content): sha1_git = content["sha1_git"] url = reverse( "browse-content", url_args={"query_string": content["sha1"]}, query_params={"path": content["path"]}, ) url_raw = reverse("browse-content-raw", url_args={"query_string": content["sha1"]}) resp = check_html_get_response( client, url, status_code=200, template_used="browse/content.html" ) content_display = _process_content_for_display(archive_data, content) mimetype = content_display["mimetype"] if mimetype.startswith("text/"): assert_contains(resp, '

' % content_display["language"])
        assert_contains(resp, escape(content_display["content_data"]))
    assert_contains(resp, url_raw)

    swh_cnt_id = gen_swhid(ObjectType.CONTENT, sha1_git)
    swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id})
    assert_contains(resp, swh_cnt_id)
    assert_contains(resp, swh_cnt_id_url)
    assert_not_contains(resp, "swh-metadata-popover")


@given(content_application_no_highlight(), content_text_no_highlight())
def test_content_view_no_highlight(client, archive_data, content_app, content_text):
    for content_ in (content_app, content_text):
        content = content_
        sha1_git = content["sha1_git"]

        url = reverse("browse-content", url_args={"query_string": content["sha1"]})

        url_raw = reverse(
            "browse-content-raw", url_args={"query_string": content["sha1"]}
        )

        resp = check_html_get_response(
            client, url, status_code=200, template_used="browse/content.html"
        )

        content_display = _process_content_for_display(archive_data, content)

        assert_contains(resp, '')
        assert_contains(resp, escape(content_display["content_data"]))
        assert_contains(resp, url_raw)

        swh_cnt_id = gen_swhid(ObjectType.CONTENT, sha1_git)
        swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id})

        assert_contains(resp, swh_cnt_id)
        assert_contains(resp, swh_cnt_id_url)


@given(content_text_non_utf8())
def test_content_view_no_utf8_text(client, archive_data, content):
    sha1_git = content["sha1_git"]

    url = reverse("browse-content", url_args={"query_string": content["sha1"]})

    resp = check_html_get_response(
        client, url, status_code=200, template_used="browse/content.html"
    )

    content_display = _process_content_for_display(archive_data, content)

    swh_cnt_id = gen_swhid(ObjectType.CONTENT, sha1_git)
    swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id})
    assert_contains(resp, swh_cnt_id_url)
    assert_contains(resp, escape(content_display["content_data"]))


@given(content_image_type())
def test_content_view_image(client, archive_data, content):
    url = reverse("browse-content", url_args={"query_string": content["sha1"]})

    url_raw = reverse("browse-content-raw", url_args={"query_string": content["sha1"]})

    resp = check_html_get_response(
        client, url, status_code=200, template_used="browse/content.html"
    )

    content_display = _process_content_for_display(archive_data, content)
    mimetype = content_display["mimetype"]
    content_data = content_display["content_data"]
    assert_contains(resp, '' % (mimetype, content_data))
    assert_contains(resp, url_raw)


@given(content_unsupported_image_type_rendering())
def test_content_view_image_no_rendering(client, archive_data, content):
    url = reverse("browse-content", url_args={"query_string": content["sha1"]})

    resp = check_html_get_response(
        client, url, status_code=200, template_used="browse/content.html"
    )

    mimetype = content["mimetype"]
    encoding = content["encoding"]
    assert_contains(
        resp,
        (
            f"Content with mime type {mimetype} and encoding {encoding} "
            "cannot be displayed."
        ),
    )


@given(content_text())
def test_content_view_text_with_path(client, archive_data, content):
    path = content["path"]

    url = reverse(
        "browse-content",
        url_args={"query_string": content["sha1"]},
        query_params={"path": path},
    )

    resp = check_html_get_response(
        client, url, status_code=200, template_used="browse/content.html"
    )

    assert_contains(resp, '' % hljs_language)
        assert_contains(resp, escape(content_display["content_data"]))

    split_path = path.split("/")

    root_dir_sha1 = split_path[0]
    filename = split_path[-1]
    path = path.replace(root_dir_sha1 + "/", "").replace(filename, "")

    swhid_context = {
        "anchor": gen_swhid(ObjectType.DIRECTORY, root_dir_sha1),
        "path": f"/{path}{filename}",
    }

    swh_cnt_id = gen_swhid(
        ObjectType.CONTENT, content["sha1_git"], metadata=swhid_context
    )
    swh_cnt_id_url = reverse("browse-swhid", url_args={"swhid": swh_cnt_id})
    assert_contains(resp, swh_cnt_id)
    assert_contains(resp, swh_cnt_id_url)

    path_info = gen_path_info(path)

    root_dir_url = reverse("browse-directory", url_args={"sha1_git": root_dir_sha1})

    assert_contains(resp, '', count=len(path_info) + 1)

    assert_contains(
        resp, '' + root_dir_sha1[:7] + ""
    )

    for p in path_info:
        dir_url = reverse(
            "browse-directory",
            url_args={"sha1_git": root_dir_sha1},
            query_params={"path": p["path"]},
        )
        assert_contains(resp, '' + p["name"] + "")

    assert_contains(resp, "
" + filename + "")

    url_raw = reverse(
        "browse-content-raw",
        url_args={"query_string": content["sha1"]},
        query_params={"filename": filename},
    )
    assert_contains(resp, url_raw)

    url = reverse(
        "browse-content",
        url_args={"query_string": content["sha1"]},
        query_params={"path": filename},
    )

    resp = check_html_get_response(
        client, url, status_code=200, template_used="browse/content.html"
    )

    assert_not_contains(resp, '{branch_info['name']}")

    cnt_swhid = gen_swhid(
        ObjectType.CONTENT,
        directory_file["checksums"]["sha1_git"],
        metadata={
            "origin": origin["url"],
            "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]),
            "anchor": gen_swhid(ObjectType.REVISION, branch_info["revision"]),
            "path": f"/{directory_file['name']}",
        },
    )
    assert_contains(resp, cnt_swhid)

    dir_swhid = gen_swhid(
        ObjectType.DIRECTORY,
        directory,
        metadata={
            "origin": origin["url"],
            "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]),
            "anchor": gen_swhid(ObjectType.REVISION, branch_info["revision"]),
        },
    )
    assert_contains(resp, dir_swhid)

    rev_swhid = gen_swhid(
        ObjectType.REVISION,
        branch_info["revision"],
        metadata={
            "origin": origin["url"],
            "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]),
        },
    )
    assert_contains(resp, rev_swhid)

    snp_swhid = gen_swhid(
        ObjectType.SNAPSHOT, snapshot["id"], metadata={"origin": origin["url"],},
    )
    assert_contains(resp, snp_swhid)


@given(origin_with_multiple_visits())
def test_content_origin_snapshot_release_browse(client, archive_data, origin):
    visits = archive_data.origin_visit_get(origin["url"])
    visit = random.choice(visits)
    snapshot = archive_data.snapshot_get(visit["snapshot"])
    snapshot_sizes = archive_data.snapshot_count_branches(visit["snapshot"])
    branches, releases, _ = process_snapshot_branches(snapshot)
    release_info = random.choice(releases)

    directory_content = archive_data.directory_ls(release_info["directory"])
    directory_file = random.choice(
        [e for e in directory_content if e["type"] == "file"]
    )

    url = reverse(
        "browse-content",
        url_args={"query_string": directory_file["checksums"]["sha1"]},
        query_params={
            "origin_url": origin["url"],
            "snapshot": snapshot["id"],
            "release": release_info["name"],
            "path": directory_file["name"],
        },
    )

    resp = check_html_get_response(
        client, url, status_code=200, template_used="browse/content.html"
    )

    _check_origin_snapshot_related_html(
        resp, origin, snapshot, snapshot_sizes, branches, releases
    )
    assert_contains(resp, directory_file["name"])
    assert_contains(resp, f"Release: {release_info['name']}")

    cnt_swhid = gen_swhid(
        ObjectType.CONTENT,
        directory_file["checksums"]["sha1_git"],
        metadata={
            "origin": origin["url"],
            "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]),
            "anchor": gen_swhid(ObjectType.RELEASE, release_info["id"]),
            "path": f"/{directory_file['name']}",
        },
    )
    assert_contains(resp, cnt_swhid)

    dir_swhid = gen_swhid(
        ObjectType.DIRECTORY,
        release_info["directory"],
        metadata={
            "origin": origin["url"],
            "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]),
            "anchor": gen_swhid(ObjectType.RELEASE, release_info["id"]),
        },
    )
    assert_contains(resp, dir_swhid)

    rev_swhid = gen_swhid(
        ObjectType.REVISION,
        release_info["target"],
        metadata={
            "origin": origin["url"],
            "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]),
        },
    )
    assert_contains(resp, rev_swhid)

    rel_swhid = gen_swhid(
        ObjectType.RELEASE,
        release_info["id"],
        metadata={
            "origin": origin["url"],
            "visit": gen_swhid(ObjectType.SNAPSHOT, snapshot["id"]),
        },
    )
    assert_contains(resp, rel_swhid)

    snp_swhid = gen_swhid(
        ObjectType.SNAPSHOT, snapshot["id"], metadata={"origin": origin["url"],},
    )
    assert_contains(resp, snp_swhid)


def _check_origin_snapshot_related_html(
    resp, origin, snapshot, snapshot_sizes, branches, releases
):
    browse_origin_url = reverse(
        "browse-origin", query_params={"origin_url": origin["url"]}
    )
    assert_contains(resp, f'href="{browse_origin_url}"')

    origin_branches_url = reverse(
        "browse-origin-branches",
        query_params={"origin_url": origin["url"], "snapshot": snapshot["id"]},
    )

    assert_contains(resp, f'href="{escape(origin_branches_url)}"')
    assert_contains(resp, f"Branches ({snapshot_sizes['revision']})")

    origin_releases_url = reverse(
        "browse-origin-releases",
        query_params={"origin_url": origin["url"], "snapshot": snapshot["id"]},
    )

    assert_contains(resp, f'href="{escape(origin_releases_url)}"')
    assert_contains(resp, f"Releases ({snapshot_sizes['release']})")

    assert_contains(resp, '', count=len(branches))
    assert_contains(resp, '
', count=len(releases))


def _process_content_for_display(archive_data, content):
    content_data = archive_data.content_get_data(content["sha1"])

    mime_type, encoding = get_mimetype_and_encoding_for_content(content_data["data"])

    mime_type, encoding, content_data = _re_encode_content(
        mime_type, encoding, content_data["data"]
    )

    content_display = prepare_content_for_display(
        content_data, mime_type, content["path"]
    )

    assert type(content_display["content_data"]) == str

    return content_display